
##################################################################################
#                         GOV 2001: REPLICATION PROJECT                          #
#  Replication Article: Lipsmeyer, Christine S. & Zhu, Ling (2011)  -            #
# "IMMIGRATION, GLOBALIZATION, AND UNEMPLOYMENT BENEFITS IN DEVELOPED EU STATES" #
#                       AJPS Vol. 55(3), Pp. 647-664                             #
#              Replication Authors: Diana Draghici & Jerome Hughes               #
#                       File Version: Tue, 24 Apr 2012                           #     
##################################################################################


#################################################################################
#   PERFORM MULTIPLE IMPUTATION ON JOINT DATA SET BASED ON ORIGINAL FILES       #    
#   [Downloaded from the sources indicated by the authors in the article]       #  
#################################################################################


#################################################################################
#       Preliminaries                                                           #                   
#################################################################################



## Set working directory 
setwd("//Users/DianaDraghici//Documents//HARVARD UNIVERSITY//Courses//Spring Semester 2012//GOV 2001//Replication Project//Lipsmeyer&Zhu_AJPS_2011//Additional Data")




# Libraries
library(foreign)
library(tseries)
library(plm)
library(AER)
library(zoo)
library(TSA)
library(pcse)
library(xtable)
library(lattice)
library(Zelig)
library(lmtest)
library(nlme)
library(mvtnorm)
library(graphics)
library(Matrix)
library(apsrtable)
library(arm)




   
#############################
#     Figure 2 on p. 656    #
#############################


### Create function to produce expected values & 95% CI across simulations
exp.v.fn <- function(setX, simB){
	Bhat <- setX%*%simB
    ev <- apply(Bhat,1,mean)
    lb95ci <- apply(Bhat,1,quantile,.025)
    ub95ci <- apply(Bhat,1,quantile,.975)
    exp.v <- cbind(ev,lb95ci,ub95ci)      
    return(exp.v)
    }
  


## MI Data 1


data1 <- read.dta("ajps1.dta")

# Convert to TSCS format 
Atsdata <- pdata.frame(data1, index=c("state", "year"), drop.index=FALSE, row.names=TRUE)




#### Data Management
### Computing lagged and first-differenced variables

  # Function to create lags
lag.fn <- function(x) {lag(x, 0:1)[,2]}
  # Apply function
Atsdata$L.i_entitlement <- lag.fn(Atsdata$i_entitlement)
Atsdata$L.i_socialcorp3 <-  lag.fn(Atsdata$i_socialcorp3)
Atsdata$L.leftgov <-  lag.fn(Atsdata$leftgov)

Atsdata$L.netimmrate <- lag.fn(Atsdata$netimmrate)
Atsdata$L.laborcost_new <- lag.fn(Atsdata$laborcost_new)
Atsdata$L.unemp <- lag.fn(Atsdata$unemp)
Atsdata$L.trade <- lag.fn(Atsdata$trade)
Atsdata$L.gdp <- lag.fn(Atsdata$gdp)
Atsdata$L.fdi <- lag.fn(Atsdata$fdi)


  # Function to create first-differences
diff.fn <- function(x) {diff(x, 0:1)[,2]}
  # Apply function
Atsdata$D.i_entitlement <- diff.fn(Atsdata$i_entitlement)
Atsdata$D.i_socialcorp3 <-  diff.fn(Atsdata$i_socialcorp3)
Atsdata$D.leftgov <-  diff.fn(Atsdata$leftgov)
    
Atsdata$D.netimmrate <- diff.fn(Atsdata$netimmrate)
Atsdata$D.laborcost_new <- diff.fn(Atsdata$laborcost_new)
Atsdata$D.unemp <- diff.fn(Atsdata$unemp)
Atsdata$D.trade <- diff.fn(Atsdata$trade)
Atsdata$D.gdp <- diff.fn(Atsdata$gdp)
Atsdata$D.fdi <- diff.fn(Atsdata$fdi)



# Old model interactions

Atsdata$Dimmrate_Dlabcost <- Atsdata$D.netimmrate * Atsdata$D.laborcost_new
Atsdata$Dimmrate_Lsocial <- Atsdata$D.netimmrate * Atsdata$L.i_socialcorp3
Atsdata$Dimmrate_Lleft <- Atsdata$D.netimmrate * Atsdata$L.leftgov



### Simple OLS regression

AMod1 <- lm(i_entitlement ~ L.i_entitlement + D.netimmrate + D.laborcost_new +
  Dimmrate_Dlabcost + Dimmrate_Lsocial + Dimmrate_Lleft +
  L.i_socialcorp3 + L.leftgov + D.unemp + D.trade + D.gdp  + D.fdi +
  as.factor(state), data=Atsdata)
summary(AMod1)



    
### Draw simulated Beta coefficients from a multivariate normal based on previoulsy estimated OLS model
AsimB <- t(rmvnorm(1000, mean=AMod1$coef, sigma=summary(AMod1)$cov.unscaled))   
  

### Set covariate levels 

 ## Low Level of Change in Integration
   # Change in integration held @ 10th pctile (= low level as defined by the authors on p. 655),  
   # change in immigration varies from min to max, 
   # all other covariates held @ their mean
AsetX_lo <- t(replicate(100, apply(model.matrix(AMod1), 2, mean, na.rm=TRUE), simplify=TRUE))
AsetX_lo[,"D.netimmrate"]  <- seq(min(Atsdata[,"D.netimmrate"], na.rm=TRUE), max(Atsdata[,"D.netimmrate"], na.rm=TRUE),  len=100)
AsetX_lo[,"D.laborcost_new"] <- quantile(Atsdata[,"D.laborcost_new"], .10, na.rm=TRUE)
AsetX_lo[,"Dimmrate_Dlabcost"] <- AsetX_lo[,"D.netimmrate"]*AsetX_lo[,"D.laborcost_new"] 
AsetX_lo[,"Dimmrate_Lsocial"] <- AsetX_lo[,"D.netimmrate"]*AsetX_lo[,"L.i_socialcorp3"] 
AsetX_lo[,"Dimmrate_Lleft"] <- AsetX_lo[,"D.netimmrate"]*AsetX_lo[,"L.leftgov"] 


## High Level of Change in Integration
   # Change in integration held @ 90th pctile (= high level as defined by the authors on p. 655),  
   # change in immigration varies from min to max, 
   # all other covariates held @ their mean
AsetX_hi <- t(replicate(100, apply(model.matrix(AMod1), 2, mean, na.rm=TRUE), simplify=TRUE))
AsetX_hi[,"D.netimmrate"]  <- seq(min(Atsdata[,"D.netimmrate"], na.rm=TRUE), max(Atsdata[,"D.netimmrate"], na.rm=TRUE),  len=100)
AsetX_hi[,"D.laborcost_new"] <- quantile(Atsdata[,"D.laborcost_new"], .90, na.rm=TRUE)
AsetX_hi[,"Dimmrate_Dlabcost"] <- AsetX_hi[,"D.netimmrate"]*AsetX_hi[,"D.laborcost_new"] 
AsetX_hi[,"Dimmrate_Lsocial"] <- AsetX_hi[,"D.netimmrate"]*AsetX_hi[,"L.i_socialcorp3"] 
AsetX_hi[,"Dimmrate_Lleft"] <- AsetX_hi[,"D.netimmrate"]*AsetX_hi[,"L.leftgov"] 


### Compute predicted values based on simulated coefficients and specified covariate levels
## Estimates for Panel (1) in Figure 2
Aest.lo <- exp.v.fn(AsetX_lo, AsimB)
## Estimates for Panel (2) in Figure 2
Aest.hi <- exp.v.fn(AsetX_hi, AsimB)




#############################################################################################



################ ORIGINAL DATA

# Set working directory 
setwd("//Users/DianaDraghici//Documents//HARVARD UNIVERSITY//Courses//Spring Semester 2012//GOV 2001//Replication Project//Lipsmeyer&Zhu_AJPS_2011")

# Load dataset 
data <- read.dta("AJPS Immigration Data.dta")
attach(data)

### Expand the dataset to create a full set of state-year observations 
# (equivalently to -tsfill, full- in Stata);
# this is b/c there are unbalanced panels and time series of unequal length in the data,
# which makes some computations and the use of some packages impossible
tscs <- cbind(rep(1:15, each=37),  rep(1971:2007,15))
colnames(tscs) <- c("state","year")
fulldata <- merge(tscs, data, by= c("state","year"), all=TRUE)

# Convert to TSCS format & attach
tsdata <- pdata.frame(fulldata, index=c("state", "year"), drop.index=FALSE, row.names=TRUE)
attach(tsdata)


#### Data Management
### Computing lagged and first-differenced variables

  # Function to create lags
lag.fn <- function(x) {lag(x, 0:1)[,2]}
  # Apply function
tsdata$L.i_entitlement <- lag.fn(tsdata$i_entitlement)
tsdata$L.i_socialcorp3 <-  lag.fn(tsdata$i_socialcorp3)
tsdata$L.leftgov <-  lag.fn(tsdata$leftgov)

  # Function to create first-differences
diff.fn <- function(x) {diff(x, 0:1)[,2]}
  # Apply function
tsdata$D.netimmrate <- diff.fn(tsdata$netimmrate)
tsdata$D.laborcost_new <- diff.fn(tsdata$laborcost_new)
tsdata$D.unemp <- diff.fn(tsdata$unemp)
tsdata$D.trade <- diff.fn(tsdata$trade)
tsdata$D.gdp <- diff.fn(tsdata$gdp)
tsdata$D.fdi <- diff.fn(tsdata$fdi)

### Simple OLS regression
Mod1 <- lm(i_entitlement ~ L.i_entitlement + D.netimmrate + D.laborcost_new +
  Dimmrate_Dlabcost + Dimmrate_Lsocial + Dimmrate_Lleft +
  L.i_socialcorp3 + L.leftgov + D.unemp + D.trade + D.gdp  + D.fdi +
  as.factor(state), data=tsdata)

    
### Draw simulated Beta coefficients from a multivariate normal based on previoulsy estimated OLS model
simB <- t(rmvnorm(1000, mean=Mod1$coef, sigma=summary(Mod1)$cov.unscaled))   
  

### Set covariate levels 

 ## Low Level of Change in Integration
   # Change in integration held @ 10th pctile (= low level as defined by the authors on p. 655),  
   # change in immigration varies from min to max, 
   # all other covariates held @ their mean
setX_lo <- t(replicate(100, apply(model.matrix(Mod1), 2, mean, na.rm=TRUE), simplify=TRUE))
setX_lo[,"D.netimmrate"]  <- seq(min(tsdata[,"D.netimmrate"], na.rm=TRUE), max(tsdata[,"D.netimmrate"], na.rm=TRUE),  len=100)
setX_lo[,"D.laborcost_new"] <- quantile(tsdata[,"D.laborcost_new"], .10, na.rm=TRUE)
setX_lo[,"Dimmrate_Dlabcost"] <- setX_lo[,"D.netimmrate"]*setX_lo[,"D.laborcost_new"] 
setX_lo[,"Dimmrate_Lsocial"] <- setX_lo[,"D.netimmrate"]*setX_lo[,"L.i_socialcorp3"] 
setX_lo[,"Dimmrate_Lleft"] <- setX_lo[,"D.netimmrate"]*setX_lo[,"L.leftgov"] 


## High Level of Change in Integration
   # Change in integration held @ 90th pctile (= high level as defined by the authors on p. 655),  
   # change in immigration varies from min to max, 
   # all other covariates held @ their mean
setX_hi <- t(replicate(100, apply(model.matrix(Mod1), 2, mean, na.rm=TRUE), simplify=TRUE))
setX_hi[,"D.netimmrate"]  <- seq(min(tsdata[,"D.netimmrate"], na.rm=TRUE), max(tsdata[,"D.netimmrate"], na.rm=TRUE),  len=100)
setX_hi[,"D.laborcost_new"] <- quantile(tsdata[,"D.laborcost_new"], .90, na.rm=TRUE)
setX_hi[,"Dimmrate_Dlabcost"] <- setX_hi[,"D.netimmrate"]*setX_hi[,"D.laborcost_new"] 
setX_hi[,"Dimmrate_Lsocial"] <- setX_hi[,"D.netimmrate"]*setX_hi[,"L.i_socialcorp3"] 
setX_hi[,"Dimmrate_Lleft"] <- setX_hi[,"D.netimmrate"]*setX_hi[,"L.leftgov"] 


### Compute predicted values based on simulated coefficients and specified covariate levels
## Estimates for Panel (1) in Figure 2
est.lo <- exp.v.fn(setX_lo, simB)
## Estimates for Panel (2) in Figure 2
est.hi <- exp.v.fn(setX_hi, simB)


############################################################################################


############################################################################################

### Generate plot
pdf("AJPS2011_F2.pdf", 10, 6)

par(mfrow=c(1,2))
### Panel 1: Original Data
 ## Low level
plot(0, type="n", xlim=range(-12,10), ylim=range(27,33),
     xlab ="Change in Immmigration", ylab="Predicted Level of Unemployment Entitlement")
polygon(x = c(setX_lo[,"D.netimmrate"], rev(setX_lo[,"D.netimmrate"])),
        y = c(est.lo[, "ub95ci"], rev(est.lo[,"lb95ci"])), col="#FF000050", border=NA)
lines(setX_lo[,"D.netimmrate"], est.lo[,"ev"], col="red", type="l", lwd=2, lty=1)
lines(setX_lo[,"D.netimmrate"], est.lo[,"lb95ci"], col="red", type="l", lwd=1, lty=2)
lines(setX_lo[,"D.netimmrate"], est.lo[,"ub95ci"], col="red", type="l", lwd=1, lty=2)
text(-2, 29, "Low Level of Change \n in Integration", col="red", cex=.6)
legend(-11,27.5, legend = c("95% Confidence Interval", "Prediction"), cex=.6,
       col = c("#FF000050", "red"), lty = c(0, 1), lwd = c(0, 2),
       pch = c(22, NA), pt.bg = c("#FF000050", NA), pt.cex = 3)       
  ## High level
polygon(x = c(setX_hi[,"D.netimmrate"], rev(setX_hi[,"D.netimmrate"])),
        y = c(est.hi[,"ub95ci"], rev(est.hi[,"lb95ci"])), col="#00868B50", border=NA)
lines(setX_hi[,"D.netimmrate"], est.hi[,"ev"], col="blue", type="l", lwd=2, lty=1)
lines(setX_hi[,"D.netimmrate"], est.hi[,"lb95ci"], col="blue", type="l", lwd=1, lty=2)
lines(setX_hi[,"D.netimmrate"], est.hi[,"ub95ci"], col="blue", type="l", lwd=1, lty=2)
text(-2, 31.5, "High Level of Change \n in Integration", col="blue", cex=.6)
legend(-1,27.5, legend = c("95% Confidence Interval", "Prediction"), cex=.6,
       col = c("#00868B50", "blue"), lty = c(0, 1), lwd = c(0, 2),
       pch = c(22, NA), pt.bg = c("#00868B50", NA), pt.cex = 3)
text(-1, 33, "Original Data Used by the Authors", col="black", cex=.9)


### Panel 2: MI Amelia Data
 ## Low level
plot(0, type="n", xlim=range(-12,10), ylim=range(23,33.5),
     xlab ="Change in Immmigration", ylab="Predicted Level of Unemployment Entitlement")
polygon(x = c(AsetX_lo[,"D.netimmrate"], rev(AsetX_lo[,"D.netimmrate"])),
        y = c(Aest.lo[, "ub95ci"], rev(Aest.lo[,"lb95ci"])), col="#FF000050", border=NA)
lines(AsetX_lo[,"D.netimmrate"], Aest.lo[,"ev"], col="red", type="l", lwd=2, lty=1)
lines(AsetX_lo[,"D.netimmrate"], Aest.lo[,"lb95ci"], col="red", type="l", lwd=1, lty=2)
lines(AsetX_lo[,"D.netimmrate"], Aest.lo[,"ub95ci"], col="red", type="l", lwd=1, lty=2)
text(5, 28, "Low Level of Change \n in Integration", col="red", cex=.6)
legend(-10,23.75, legend = c("95% Confidence Interval", "Prediction"), cex=.6,
       col = c("#FF000050", "red"), lty = c(0, 1), lwd = c(0, 2),
       pch = c(22, NA), pt.bg = c("#FF000050", NA), pt.cex = 3)       
  ## High level
polygon(x = c(AsetX_hi[,"D.netimmrate"], rev(AsetX_hi[,"D.netimmrate"])),
        y = c(Aest.hi[,"ub95ci"], rev(Aest.hi[,"lb95ci"])), col="#00868B50", border=NA)
lines(AsetX_hi[,"D.netimmrate"], Aest.hi[,"ev"], col="blue", type="l", lwd=2, lty=1)
lines(AsetX_hi[,"D.netimmrate"], Aest.hi[,"lb95ci"], col="blue", type="l", lwd=1, lty=2)
lines(AsetX_hi[,"D.netimmrate"], Aest.hi[,"ub95ci"], col="blue", type="l", lwd=1, lty=2)
text(-1, 32, "High Level of Change \n in Integration", col="blue", cex=.6)
legend(1,23.75, legend = c("95% Confidence Interval", "Prediction"), cex=.6,
       col = c("#00868B50", "blue"), lty = c(0, 1), lwd = c(0, 2),
       pch = c(22, NA), pt.bg = c("#00868B50", NA), pt.cex = 3)
text(-1, 33.5, "Multiply Imputed Data Using Amelia II", col="black", cex=.9)
       
      
  ## Add plot title
par(mfrow=c(1,1))
mtext("FIGURE 2: Levels of Change in Integration Across the Range of Change in Immigration \n All other variables are held at their mean values \n")

dev.off()

#########  End of Figure 2 ########


